{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Imports and Initials"
      ],
      "metadata": {
        "id": "7BsCCzbuXx2T"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import random\n",
        "import numpy as np\n",
        "from PIL import Image\n",
        "from datetime import datetime"
      ],
      "metadata": {
        "id": "AKml8sjmRzke"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "! mkdir mixed_dataset"
      ],
      "metadata": {
        "id": "GHMiyTR1X2wO"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "id": "8I9bP74g9dAf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Downloading dataset(s)"
      ],
      "metadata": {
        "id": "_FEGAhKeNGp1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Downloading the MIT-BIH dataset\n",
        "# fs = 360\n",
        "# Containing Anomaly\n",
        "# Should get all possible EKMs\n",
        "! wget -r -N -c -np https://physionet.org/files/mitdb/1.0.0/"
      ],
      "metadata": {
        "id": "dN26YDY9NEZT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Downloading the NSRDB dataset\n",
        "# fs = 128\n",
        "! wget -r -N -c -np https://physionet.org/files/nsrdb/1.0.0/"
      ],
      "metadata": {
        "id": "EEALFsVDObGf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Downloading the PTBDB dataset\n",
        "# fs = 1000\n",
        "# Containing Anomaly\n",
        "# Should get all possible EKMs\n",
        "! wget -r -N -c -np https://physionet.org/files/ptbdb/1.0.0/"
      ],
      "metadata": {
        "id": "dqlMn3Yg8B1n"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Labeling (MIT-BIHDB)"
      ],
      "metadata": {
        "id": "afJWgDdpQy4o"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def health_checker(path):\n",
        "  with open(path, \"r\") as f:\n",
        "    text = f.read()\n",
        "\n",
        "  if \"None\" in text:\n",
        "    return \"healthy\"\n",
        "  else:\n",
        "    return \"cvd\""
      ],
      "metadata": {
        "id": "rNcxrWjDSuMk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Checked\n",
        "health_checker(\"/content/physionet.org/files/mitdb/1.0.0/113.hea\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "yUSLjeczTwYw",
        "outputId": "67e5f215-12ee-45d1-d1ec-ee8b1b4a5fed"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'healthy'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Extracting labels of users (saving in users_labels variable)\n",
        "# healthy users are labeled 1 and others labeled 0\n",
        "source_path = \"/content/physionet.org/files/mitdb/1.0.0/\"\n",
        "users_labels = {}\n",
        "\n",
        "for user_file in os.listdir(source_path):\n",
        "  user_id = user_file.split(\".\")[0]\n",
        "  file_type = user_file.split(\".\")[-1]\n",
        "\n",
        "  if file_type == \"hea\":\n",
        "    user_file_full_path = source_path + user_file\n",
        "    if health_checker(user_file_full_path) == \"healthy\":\n",
        "      users_labels[user_id] = 1\n",
        "    elif health_checker(user_file_full_path) == \"cvd\":\n",
        "      users_labels[user_id] = 0"
      ],
      "metadata": {
        "id": "h763_GUmQ3Ae"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Checked\n",
        "users_labels[\"113\"]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CcDVIVmiRPQt",
        "outputId": "d5bfaf1d-b7d8-448d-c52a-e2f1b2019231"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "metadata": {},
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Getting healthy users\n",
        "healthy_users = []\n",
        "\n",
        "for key in users_labels:\n",
        "  if users_labels[key] == 1:\n",
        "    healthy_users.append(key)\n",
        "\n",
        "healthy_users.sort()\n",
        "print(healthy_users)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "P_FAQy_FWVOo",
        "outputId": "1aa3d586-a48e-4e84-a43b-94f77f2c0680"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['113', '115', '116', '117', '208', '210', '212', '215', '223', '231', '234']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Healthy users in MIT-DB are users with following ids:\n",
        "\n",
        "'113', '115', '116', '117', '208', '210', '212', '215', '223', '231', '234'"
      ],
      "metadata": {
        "id": "Ym-pbmpYEQWQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "healthy_users = ['113', '115', '116', '117', '208', '210', '212', '215', '223', '231', '234']"
      ],
      "metadata": {
        "id": "gdeItINksSJI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Mixing datasets"
      ],
      "metadata": {
        "id": "i_bGpsAKNo1G"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Mixing (MIT-DB) and (NSRDB)"
      ],
      "metadata": {
        "id": "6-7cHlpq9slN"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Extracting MIT-DB dataset"
      ],
      "metadata": {
        "id": "n1oF_-OYYqt8"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "! ls /content/drive/MyDrive/ECG\\ project/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-WY3hONzL9C3",
        "outputId": "10bb34dd-ee75-4b50-83f9-6ab50c322139"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            " EKM_MIT_DB_5bpf.tar.gz   EKM_PTBDB_5bpf.tar.gz\n",
            " EKM_NSRDB_5bpf.tar.gz\t 'N. Mokhtari'\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "OWBJF4nHB_ts"
      },
      "outputs": [],
      "source": [
        "# Extracting the MIT-DB dataset's .tar.zip file\n",
        "! tar -xzvf \"/content/drive/MyDrive/ECG project/EKM_MIT_DB_5bpf.tar.gz\""
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Just chaning the name of extracted folder to EKM_MITDB_dataset\n",
        "! mv EKM_dataset EKM_MITDB_dataset"
      ],
      "metadata": {
        "id": "iHkHX2pcEm3Y"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Moving the train and test EKMs into one folder\n",
        "! mv EKM_MITDB_dataset/train/* EKM_MITDB_dataset/\n",
        "! mv EKM_MITDB_dataset/test/* EKM_MITDB_dataset/"
      ],
      "metadata": {
        "id": "ZJIcsYSNFGyz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "! rm -r EKM_MITDB_dataset/train/\n",
        "! rm -r EKM_MITDB_dataset/test/"
      ],
      "metadata": {
        "id": "6lGe8PkKFsTh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "healthy_users"
      ],
      "metadata": {
        "id": "fiht-UFlYmFQ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8a78a50b-951a-4e94-8cbc-df1c877e7e03"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['113', '115', '116', '117', '208', '210', '212', '215', '223', '231', '234']"
            ]
          },
          "metadata": {},
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Counting the healthy users in MITDB\n",
        "healthy_users_counter = 0\n",
        "source_path = \"/content/EKM_MITDB_dataset\"\n",
        "ekms_files_list = os.listdir(source_path)\n",
        "\n",
        "for ekm in ekms_files_list:\n",
        "  user_id = ekm.split(\"-\")[-2]\n",
        "\n",
        "  if user_id in healthy_users:\n",
        "    healthy_users_counter += 1\n",
        "\n",
        "cvd_users_count = len(ekms_files_list) - healthy_users_counter\n",
        "print(f\"Number of healthy users in MITDB:\\t {healthy_users_counter}\")\n",
        "print(f\"Number of users with CVD in MITDB:\\t {cvd_users_count}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "j0PUWkXWHjM3",
        "outputId": "d9eaae42-a1ab-4c38-ebac-e679cd1a3c5c"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Number of healthy users in MITDB:\t 5203\n",
            "Number of users with CVD in MITDB:\t 16410\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Copying MITDB dataset into bowl\n",
        "! cp EKM_MITDB_dataset/* bowl/"
      ],
      "metadata": {
        "id": "ohkSwFvIKTj0"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Extracting the NSRDB dataset"
      ],
      "metadata": {
        "id": "s3bMKJF-K7Hj"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Extracting the NSRDB dataset's .tar.zip file\n",
        "! tar -xzvf \"/content/drive/MyDrive/ECG project/EKM_NSRDB_5bpf.tar.gz\""
      ],
      "metadata": {
        "id": "UwYJRofmLBGm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Just chaning the name of extracted folder to EKM_MITDB_dataset\n",
        "! mv EKM_dataset EKM_NSRDB_dataset"
      ],
      "metadata": {
        "id": "MxR1G7d5LXq1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Moving the train and test EKMs into one folder\n",
        "! find EKM_NSRDB_dataset/train/ -name '*' -exec mv {} EKM_NSRDB_dataset/ \\;\n",
        "! mv EKM_NSRDB_dataset/test/* EKM_NSRDB_dataset/"
      ],
      "metadata": {
        "id": "dL98icAnLXq4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "! rm -r EKM_NSRDB_dataset/train/\n",
        "! rm -r EKM_NSRDB_dataset/test/"
      ],
      "metadata": {
        "id": "jKPNe-kwLXq6"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Mixing with same proportion of healthy users and ill users"
      ],
      "metadata": {
        "id": "AjMTOfpkO4ue"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "amount_of_ekms_needed_from_NSRDB = cvd_users_count - healthy_users_counter"
      ],
      "metadata": {
        "id": "aWWwPKYWPEBL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Getting ekms needed from NSRDB randomly\n",
        "source_path = \"/content/EKM_NSRDB_dataset\"\n",
        "all_ekms_in_NSRDB = os.listdir(source_path)\n",
        "\n",
        "random_ekms_from_NSRDB = random.sample(all_ekms_in_NSRDB, amount_of_ekms_needed_from_NSRDB)"
      ],
      "metadata": {
        "id": "dDiL19jAQz25"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "len(random_ekms_from_NSRDB)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2PuKVrCRbFQC",
        "outputId": "51bcfe92-63c2-4460-dae3-1b5470ed2b1e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "11207"
            ]
          },
          "metadata": {},
          "execution_count": 50
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "for ekm in random_ekms_from_NSRDB:\n",
        "  ! cp $source_path/$ekm /content/bowl"
      ],
      "metadata": {
        "id": "bozRFAXmTGBJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Mixed dataset EKMs' amount\n",
        "len(os.listdir(\"/content/bowl\"))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_cUbAcn5a7qJ",
        "outputId": "1e34b505-7750-4f59-92fe-b9358f1643cb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "32821"
            ]
          },
          "metadata": {},
          "execution_count": 51
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Vetorizing the dataset"
      ],
      "metadata": {
        "id": "DnlSWAmeXMfh"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# We can label the images by vitually,\n",
        "# in this way that, when numericlizing the labels\n",
        "# we can add the amount of previous dataset users\n",
        "# when we want to athenticate them."
      ],
      "metadata": {
        "id": "lxI2nxFeXTQC"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# For labeling cvd/healthy (0 or 1) just use dataset's name\n",
        "# and the users id(key)"
      ],
      "metadata": {
        "id": "DeYR5sW8X1Z4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "6bMmj29cF3UK"
      },
      "outputs": [],
      "source": [
        "def vertorizing_png_imges(address):\n",
        "  # Load the PNG image\n",
        "  image = Image.open(address)\n",
        "\n",
        "  # Convert the image to RGB mode\n",
        "  image = image.convert('RGB')\n",
        "\n",
        "  # Resize the image to match the input size expected by the CNN\n",
        "  desired_width = 33\n",
        "  desired_height = 21\n",
        "  image = image.resize((desired_width, desired_height))\n",
        "\n",
        "  # Convert the image to a NumPy array\n",
        "  image_array = np.array(image)\n",
        "\n",
        "  # Reshape the array to match the input shape expected by the CNN\n",
        "  # image_array = image_array.reshape((1, desired_height, desired_width, 3))\n",
        "\n",
        "  # Normalize the array\n",
        "  image_array = image_array.astype('float32') / 255.0\n",
        "\n",
        "  return image_array"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "2eFMJYWjWavo"
      },
      "outputs": [],
      "source": [
        "X_dataset = []\n",
        "y_dataset = []"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YbuaQU2PrmDM"
      },
      "outputs": [],
      "source": [
        "from IPython.display import clear_output\n",
        "\n",
        "def progress_bar(index):\n",
        "\n",
        "  bar_length = 50\n",
        "\n",
        "  total_length = len(os.listdir(\"/content/bowl\"))\n",
        "\n",
        "  step = int(total_length / bar_length)\n",
        "  prgress = int(index / step)\n",
        "\n",
        "  # Clear the current cell's output\n",
        "  clear_output(wait=True)\n",
        "\n",
        "  print(\"[\", end=\"\")\n",
        "\n",
        "  [print(\"*\", end=\"\") for i in range(prgress)]\n",
        "  [print(\"-\", end=\"\") for i in range(bar_length - prgress)]\n",
        "  print(\"]\")\n",
        "  print(f\"{index}/{total_length}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2bJxVqkknfiu",
        "outputId": "94c182ff-2dea-49bc-841f-d9ba88e52920"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[**************************************************]\n",
            "32820/32821\n",
            "This cell took 28 minutes to run.\n"
          ]
        }
      ],
      "source": [
        "# X data for cnn network input\n",
        "base_path = \"/content/bowl/\"\n",
        "images_names = os.listdir(base_path)\n",
        "\n",
        "before_run_time = datetime.now()\n",
        "\n",
        "# Get X_dataset by vectorization\n",
        "# and y_dataset by name of the datasets and user ids\n",
        "for index, img_name in enumerate(images_names):\n",
        "\n",
        "    # Checking if the file is an image or not\n",
        "    if img_name.split(\".\")[-1] != \"png\":\n",
        "      continue\n",
        "\n",
        "    img_vector = vertorizing_png_imges(base_path + img_name)\n",
        "    X_dataset.append(img_vector)\n",
        "\n",
        "    img_name = img_name.split(\"-\")\n",
        "    user_id = img_name[-2]\n",
        "    dataset_name = img_name[-4]\n",
        "\n",
        "    # labeling 1 for healthy users and 0 for user with cvd\n",
        "    if dataset_name == \"NSRDB\":\n",
        "      y_dataset.append(1)\n",
        "    elif (dataset_name == \"MITDB\") and (user_id in healthy_users):\n",
        "      y_dataset.append(1)\n",
        "    else:\n",
        "      y_dataset.append(0)\n",
        "\n",
        "    progress_bar(index)\n",
        "\n",
        "after_run_time = datetime.now()\n",
        "diff = after_run_time - before_run_time\n",
        "print(f\"This cell took {int(diff.seconds / 60)} minutes to run.\")"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "X_dataset = np.array(X_dataset)\n",
        "y_dataset = np.array(y_dataset)"
      ],
      "metadata": {
        "id": "w618PDc-tk0g"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1NJ1mx2jZgco",
        "outputId": "220fc662-7618-47bb-9b25-13876ed6a245"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([0, 1])"
            ]
          },
          "metadata": {},
          "execution_count": 14
        }
      ],
      "source": [
        "np.unique(y_dataset)"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# CNN Architecture"
      ],
      "metadata": {
        "id": "yn8GiD7VT1Eu"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Model is mixture of (MITDB & NSRDB)"
      ],
      "metadata": {
        "id": "EDMdFVrjHg1t"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(X_dataset, y_dataset, test_size=0.2, random_state=42)"
      ],
      "metadata": {
        "id": "JjRWnXyXuvKA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FAUdHRLox-Ke"
      },
      "outputs": [],
      "source": [
        "from tensorflow.keras.models import Sequential\n",
        "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout\n",
        "from tensorflow.keras.optimizers import Adam"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "T-P9sACYVBMD"
      },
      "outputs": [],
      "source": [
        "# Creating the CNN model\n",
        "model = Sequential([\n",
        "    Conv2D(32, (3, 3), activation='relu', input_shape=(21, 33, 3)),\n",
        "    MaxPooling2D(pool_size=(2, 2)),\n",
        "    Dropout(0.7),\n",
        "    Flatten(),\n",
        "    Dense(128, activation='relu'),\n",
        "    Dense(64, activation='relu'),\n",
        "    Dense(2, activation='softmax')\n",
        "])\n",
        "\n",
        "# Setting Adam optimizer\n",
        "optimizer = Adam(learning_rate=0.001)\n",
        "\n",
        "# Compileing the model with the optimizer\n",
        "model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "o-xquUYwMUxN",
        "outputId": "165e2843-ecb2-41b5-dc66-12c6384d0917"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 1/10\n",
            "1276/1276 [==============================] - 33s 25ms/step - loss: 0.2989 - accuracy: 0.8609\n",
            "Epoch 2/10\n",
            "1276/1276 [==============================] - 32s 25ms/step - loss: 0.1737 - accuracy: 0.9270\n",
            "Epoch 3/10\n",
            "1276/1276 [==============================] - 31s 24ms/step - loss: 0.1117 - accuracy: 0.9558\n",
            "Epoch 4/10\n",
            "1276/1276 [==============================] - 30s 24ms/step - loss: 0.0797 - accuracy: 0.9693\n",
            "Epoch 5/10\n",
            "1276/1276 [==============================] - 32s 25ms/step - loss: 0.0646 - accuracy: 0.9762\n",
            "Epoch 6/10\n",
            "1276/1276 [==============================] - 32s 25ms/step - loss: 0.0545 - accuracy: 0.9799\n",
            "Epoch 7/10\n",
            "1276/1276 [==============================] - 32s 25ms/step - loss: 0.0465 - accuracy: 0.9837\n",
            "Epoch 8/10\n",
            "1276/1276 [==============================] - 31s 25ms/step - loss: 0.0449 - accuracy: 0.9839\n",
            "Epoch 9/10\n",
            "1276/1276 [==============================] - 30s 23ms/step - loss: 0.0410 - accuracy: 0.9858\n",
            "Epoch 10/10\n",
            "1276/1276 [==============================] - 31s 24ms/step - loss: 0.0365 - accuracy: 0.9873\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<keras.callbacks.History at 0x7ed31bd32e00>"
            ]
          },
          "metadata": {},
          "execution_count": 18
        }
      ],
      "source": [
        "# Train the model\n",
        "model.fit(X_train, y_train, epochs=10, batch_size=32)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sc36-Oqn6MY3",
        "outputId": "e9baa6eb-1a1f-4975-a10d-548d7813dcf2"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "319/319 - 2s - loss: 0.0169 - accuracy: 0.9948 - 2s/epoch - 6ms/step\n",
            "Test Loss: 0.0169\n",
            "Test Accuracy: 0.9948\n"
          ]
        }
      ],
      "source": [
        "# Evaluate the model on the test set\n",
        "test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)\n",
        "print(f'Test Loss: {test_loss:.4f}')\n",
        "print(f'Test Accuracy: {test_accuracy:.4f}')"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Calculate the proportion of each unique value\n",
        "unique_values, counts = np.unique(y_test, return_counts=True)\n",
        "proportions = counts / len(y_test)\n",
        "\n",
        "# Create a dictionary to store the unique values and their proportions\n",
        "value_proportions = dict(zip(unique_values, proportions))\n",
        "\n",
        "# Print the value proportions\n",
        "for value, proportion in value_proportions.items():\n",
        "    print(f\"Value: {value}, Proportion: {proportion:.2f}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "oUyKRO5Z9y6a",
        "outputId": "24b2ba89-682b-4d52-f49a-7b5635ded169"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Value: 0, Proportion: 0.50\n",
            "Value: 1, Proportion: 0.50\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "ONG50HFTJlOf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Testing with PTBDB"
      ],
      "metadata": {
        "id": "0Z7ZTRu3Jm4t"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Gathering .dat files of PTB dataset\n",
        "# For PTB dataset\n",
        "\n",
        "if \"ptbdb_dat_files\" in os.listdir(\"/content\"):\n",
        "  ! rm -r ptbdb_dat_files\n",
        "! mkdir ptbdb_dat_files\n",
        "\n",
        "ecg_mine_path = \"/content/physionet.org/files/ptbdb/1.0.0\"\n",
        "ptb_labeling_dict = {}\n",
        "ptb_labeling_file_to_user = {}\n",
        "\n",
        "files_list = os.listdir(ecg_mine_path)\n",
        "# Extracting ECG files of patients and saving them\n",
        "# into /content/ptbdb_dat_files directory\n",
        "for patient_folder in files_list:\n",
        "  # chckeing if the folder is a patient's files or not\n",
        "  patient_folder_or_not = str(patient_folder)[:-3]\n",
        "  if patient_folder_or_not != \"patient\":\n",
        "    continue\n",
        "\n",
        "  # key is user's id\n",
        "  key = patient_folder[-3:]\n",
        "  ptb_labeling_dict[key] = []\n",
        "  each_patient_files = os.listdir(f\"{ecg_mine_path}/{patient_folder}\")\n",
        "  for f in each_patient_files:\n",
        "    ! cp \"$ecg_mine_path/$patient_folder/$f\" ptbdb_dat_files\n",
        "    ptb_labeling_dict[key].append(f)\n",
        "    ptb_labeling_file_to_user[f] = key"
      ],
      "metadata": {
        "id": "XGo3P3tMEOVu"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "ecg_mine_path = \"/content/ptbdb_dat_files\"\n",
        "\n",
        "files_list = os.listdir(ecg_mine_path)\n",
        "healthy_controls = []\n",
        "\n",
        "for f in files_list:\n",
        "  f_extention = f.split(\".\")[-1]\n",
        "  if f_extention == \"hea\":\n",
        "    with open(f\"{ecg_mine_path}/{f}\", \"r\") as hea_file:\n",
        "      text = hea_file.read()\n",
        "\n",
        "    if \"Healthy control\" in text:\n",
        "      healthy_controls.append(f.split(\".\")[0])"
      ],
      "metadata": {
        "id": "yDAl-SzpFi2G"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "healthy_users_id = []\n",
        "for healthy_user_file_name in healthy_controls:\n",
        "  healthy_user_id = ptb_labeling_file_to_user[f\"{healthy_user_file_name}.dat\"]\n",
        "  healthy_users_id.append(healthy_user_id)"
      ],
      "metadata": {
        "id": "9gVBSkboJHa3"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# unique healthy users\n",
        "len(set(healthy_users_id))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "61gYEyLvKSgv",
        "outputId": "692bf8fd-ffeb-47dc-9aab-f40b9ddb220b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "52"
            ]
          },
          "metadata": {},
          "execution_count": 47
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "healthy_users_id[0]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "9lyUDsAxN5an",
        "outputId": "138ab358-b546-4d7b-ecd2-8b1f9c259764"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'252'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 62
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "healthy_users_id = list(set(healthy_users_id))"
      ],
      "metadata": {
        "id": "nqiyL2KZLB0L"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "hzt0F3iRL3Jt"
      },
      "outputs": [],
      "source": [
        "# Extracting the PTBDB dataset's .tar.zip file\n",
        "! tar -xzvf \"/content/drive/MyDrive/ECG project/EKM_PTBDB_5bpf.tar.gz\""
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Just chaning the name of extracted folder to EKM_PTBDB_dataset\n",
        "! mv EKM_dataset EKM_PTBDB_dataset"
      ],
      "metadata": {
        "id": "5QRSSmL3L3Jw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Moving the train and test EKMs into one folder\n",
        "! mv EKM_PTBDB_dataset/train/* EKM_PTBDB_dataset/\n",
        "! mv EKM_PTBDB_dataset/test/* EKM_PTBDB_dataset/"
      ],
      "metadata": {
        "id": "UUUFAgAxMyuk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "! rm -r EKM_PTBDB_dataset/train/\n",
        "! rm -r EKM_PTBDB_dataset/test/"
      ],
      "metadata": {
        "id": "LCEUwl0qMyul"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Getting 1000 ekms from PTBDB randomly for test\n",
        "source_path = \"/content/EKM_PTBDB_dataset/\"\n",
        "all_ekms_in_PTBDB = os.listdir(source_path)\n",
        "\n",
        "random_ekms_from_PTBDB = random.sample(all_ekms_in_PTBDB, 1000)"
      ],
      "metadata": {
        "id": "mXGDByo8MQgf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "ptb_test_x = []\n",
        "ptb_test_y = []"
      ],
      "metadata": {
        "id": "wNqHXUPtNBdO"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "for ekm in all_ekms_in_PTBDB:\n",
        "  img_vector = vertorizing_png_imges(source_path + ekm)\n",
        "  ptb_test_x.append(img_vector)\n",
        "\n",
        "  user_id = ekm.split(\"-\")[-2]\n",
        "  if user_id in healthy_users_id:\n",
        "    ptb_test_y.append(1)\n",
        "  else:\n",
        "    ptb_test_y.append(0)"
      ],
      "metadata": {
        "id": "JlT-UEKhNDNP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "ptb_test_x = np.array(ptb_test_x)\n",
        "ptb_test_y = np.array(ptb_test_y)"
      ],
      "metadata": {
        "id": "_nimNsx9OMnX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1d9ac7e8-bc08-4ca8-fde2-7a9804adf7db",
        "id": "hG8JzdnNOGhK"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "291/291 - 2s - loss: 0.8868 - accuracy: 0.8242 - 2s/epoch - 6ms/step\n",
            "Test Loss: 0.8868\n",
            "Test Accuracy: 0.8242\n"
          ]
        }
      ],
      "source": [
        "# Evaluate the model on the test set\n",
        "test_loss, test_accuracy = model.evaluate(ptb_test_x, ptb_test_y, verbose=2)\n",
        "print(f'Test Loss: {test_loss:.4f}')\n",
        "print(f'Test Accuracy: {test_accuracy:.4f}')"
      ]
    }
  ]
}